{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "自动微分计算.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0ToQvCaQZPCs"
      },
      "source": [
        "# 自动微分计算\n",
        "\n",
        "神经网络通常依赖反向传播求梯度来更新网络参数，求梯度过程通常是一件非常复杂而容易出错的事情。而深度学习框架可以帮助我们自动地完成这种求梯度运算。Tensorflow一般使用梯度磁带tf.GradientTape来记录正向运算过程，然后反播磁带自动得到梯度值。这种利用 tf.GradientTape 求微分的方法叫做Tensorflow的自动微分机制。"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ieb8arc9Z7Aa"
      },
      "source": [
        "# 利用梯度磁带求导数"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nHkJL-7tZM_y",
        "outputId": "930ccdea-b38d-405b-db7d-671ce714d231",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "\n",
        "\n",
        "import tensorflow as tf\n",
        "import numpy as np \n",
        "\n",
        "# f(x) = a*x**2 + b*x + c 的导数\n",
        "\n",
        "x = tf.Variable(0.0, name = \"x\",dtype = tf.float32)\n",
        "a = tf.constant(1.0)\n",
        "b = tf.constant(-2.0)\n",
        "c = tf.constant(1.0)\n",
        "\n",
        "with tf.GradientTape() as tape:\n",
        "    y = a*tf.pow(x, 2) + b*x + c\n",
        "    \n",
        "dy_dx = tape.gradient(y,x)\n",
        "\n",
        "print(dy_dx)"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "tf.Tensor(-2.0, shape=(), dtype=float32)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Buk4WhGDZtnJ",
        "outputId": "43d63cfe-015e-440a-a5f4-f84ee620dff6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        }
      },
      "source": [
        "# 对常量张量也可以求导，需要增加watch\n",
        "\n",
        "with tf.GradientTape() as tape:\n",
        "    tape.watch([a,b,c])\n",
        "    y = a*tf.pow(x,2) + b*x + c\n",
        "    \n",
        "dy_dx,dy_da,dy_db,dy_dc = tape.gradient(y,[x,a,b,c])\n",
        "\n",
        "print(dy_da)\n",
        "print(dy_dc)"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "tf.Tensor(0.0, shape=(), dtype=float32)\n",
            "tf.Tensor(1.0, shape=(), dtype=float32)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VuC7JG82ZxmM",
        "outputId": "1209fea4-c4d1-4eb1-a867-0bc546afb5b3",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "# 可以求二阶导数\n",
        "with tf.GradientTape() as tape2:\n",
        "    with tf.GradientTape() as tape1:   \n",
        "        y = a*tf.pow(x,2) + b*x + c\n",
        "    dy_dx = tape1.gradient(y,x)   \n",
        "dy2_dx2 = tape2.gradient(dy_dx,x)\n",
        "\n",
        "print(dy2_dx2)"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "tf.Tensor(2.0, shape=(), dtype=float32)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8mFaeQraZ1xj",
        "outputId": "9cb70114-a315-423a-a959-e5e63c21d7d6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        }
      },
      "source": [
        "# 可以在autograph中使用\n",
        "\n",
        "@tf.function\n",
        "def f(x):   \n",
        "    a = tf.constant(1.0)\n",
        "    b = tf.constant(-2.0)\n",
        "    c = tf.constant(1.0)\n",
        "    \n",
        "    # 自变量转换成tf.float32\n",
        "    x = tf.cast(x, tf.float32)\n",
        "    with tf.GradientTape() as tape:\n",
        "        tape.watch(x)\n",
        "        y = a*tf.pow(x,2)+b*x+c\n",
        "    dy_dx = tape.gradient(y,x) \n",
        "    \n",
        "    return((dy_dx,y))\n",
        "\n",
        "tf.print(f(tf.constant(0.0)))\n",
        "tf.print(f(tf.constant(1.0)))"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "(-2, 1)\n",
            "(0, 0)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LmzWk2lmZ8sw"
      },
      "source": [
        "# 利用梯度磁带和优化器求最小值"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zoI8DwS0Z_qa",
        "outputId": "900c0555-54a9-49f5-ba97-b7ccd91f7892",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "# 求f(x) = a*x**2 + b*x + c的最小值\n",
        "# 使用optimizer.apply_gradients\n",
        "\n",
        "x = tf.Variable(0.0,name = \"x\",dtype = tf.float32)\n",
        "a = tf.constant(1.0)\n",
        "b = tf.constant(-2.0)\n",
        "c = tf.constant(1.0)\n",
        "\n",
        "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
        "for _ in range(1000):\n",
        "    with tf.GradientTape() as tape:\n",
        "        y = a*tf.pow(x,2) + b*x + c\n",
        "    dy_dx = tape.gradient(y,x)\n",
        "    optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])\n",
        "    \n",
        "tf.print(\"y =\",y,\"; x =\",x)"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "y = 0 ; x = 0.99999851\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OSOSZpZzaHDE",
        "outputId": "29b3c4d9-d167-403a-ed8e-50cba1095872",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "# 求f(x) = a*x**2 + b*x + c的最小值\n",
        "# 使用optimizer.minimize\n",
        "# optimizer.minimize相当于先用tape求gradient,再apply_gradient\n",
        "\n",
        "x = tf.Variable(0.0,name = \"x\",dtype = tf.float32)\n",
        "\n",
        "#注意f()无参数\n",
        "def f():   \n",
        "    a = tf.constant(1.0)\n",
        "    b = tf.constant(-2.0)\n",
        "    c = tf.constant(1.0)\n",
        "    y = a*tf.pow(x,2)+b*x+c\n",
        "    return(y)\n",
        "\n",
        "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   \n",
        "for _ in range(1000):\n",
        "    optimizer.minimize(f,[x])   \n",
        "    \n",
        "tf.print(\"y =\",f(),\"; x =\",x)"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "y = 0 ; x = 0.99999851\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "roGz7C4CaVAY",
        "outputId": "18a2e1ef-1f43-4913-b1d7-37b1a38ff47b",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        }
      },
      "source": [
        "# 在autograph中完成最小值求解\n",
        "# 使用optimizer.apply_gradients\n",
        "\n",
        "x = tf.Variable(0.0,name = \"x\",dtype = tf.float32)\n",
        "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
        "\n",
        "@tf.function\n",
        "def minimizef():\n",
        "    a = tf.constant(1.0)\n",
        "    b = tf.constant(-2.0)\n",
        "    c = tf.constant(1.0)\n",
        "    \n",
        "    for _ in tf.range(1000): #注意autograph时使用tf.range(1000)而不是range(1000)\n",
        "        with tf.GradientTape() as tape:\n",
        "            y = a*tf.pow(x,2) + b*x + c\n",
        "        dy_dx = tape.gradient(y,x)\n",
        "        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])\n",
        "        \n",
        "    y = a*tf.pow(x,2) + b*x + c\n",
        "    return y\n",
        "\n",
        "tf.print(minimizef())\n",
        "tf.print(x)"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0\n",
            "0.99999851\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BMMlo1DyaaAI",
        "outputId": "b27e1e81-367b-44a0-f252-8ee2a4b388a5",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        }
      },
      "source": [
        "# 在autograph中完成最小值求解\n",
        "# 使用optimizer.minimize\n",
        "\n",
        "x = tf.Variable(0.0, name = \"x\",dtype = tf.float32)\n",
        "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   \n",
        "\n",
        "@tf.function\n",
        "def f():   \n",
        "    a = tf.constant(1.0)\n",
        "    b = tf.constant(-2.0)\n",
        "    c = tf.constant(1.0)\n",
        "    y = a*tf.pow(x,2)+b*x+c\n",
        "    return(y)\n",
        "\n",
        "@tf.function\n",
        "def train(epoch):  \n",
        "    for _ in tf.range(epoch):  \n",
        "        optimizer.minimize(f,[x])\n",
        "    return(f())\n",
        "\n",
        "\n",
        "tf.print(train(1000))\n",
        "tf.print(x)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0\n",
            "0.99999851\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}